{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import login\n",
    "import os\n",
    "import sys\n",
    "import csv\n",
    "from tqdm import trange\n",
    "from transformers import AutoModel,AutoTokenizer\n",
    "FILE_PATH = './QA_results_GT.csv'\n",
    "os.environ[\"OPENAI_API_KEY\"] = AAA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ANA_FILE_PATH = './mthp_output.csv'\n",
    "\n",
    "naiveanswer_LIST = []\n",
    "lightraganswer_LIST = []\n",
    "minianswer_LIST = []\n",
    "QUESTION_LIST = []\n",
    "GA_LIST = []\n",
    "filelength = 0\n",
    "with open(ANA_FILE_PATH, mode='r', encoding='utf-8') as question_file:\n",
    "    reader = csv.DictReader(question_file)\n",
    "    for row in reader:\n",
    "        QUESTION_LIST.append(row['Question'])\n",
    "        GA_LIST.append(row['Gold Answer'])\n",
    "        naiveanswer_LIST.append(row['naive'])\n",
    "        lightraganswer_LIST.append(row['lightrag'])\n",
    "        minianswer_LIST.append(row['minirag'])\n",
    "        filelength = filelength+1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "PROMPT = \"\"\"\n",
    "Now, I'll give you a question, a gold answer to this question, and three answers provided by different students.\n",
    "\n",
    "Determine the answer according to the following rules:\n",
    "If the answer is correct, get 1 point.\n",
    "If the answer is irrelevant to the question, it will receive 0 points.\n",
    "If the answer is incorrect, get -1 point.\n",
    "\n",
    "Return your answer in JSON mode.\n",
    "\n",
    "For example:\n",
    "\n",
    "Question:\n",
    "When does Li Hua arrive to the city?\n",
    "\n",
    "Gold Answer:\n",
    "20260105\n",
    "\n",
    "Answer1: LiHua arrived on the afternoon of January 5th\n",
    "Answer2: Sorry, there is no information about LiHua's arrival in the information you provided\n",
    "Answer3: There is no accurate answer in the information you provided, but according to the first information found, LiHua arrived on April 17th\n",
    "\n",
    "output:\n",
    "{{\n",
    "\"Score1\": 1,\n",
    "\"Score2\": 0,\n",
    "\"Score3\": -1,\n",
    "}}\n",
    "\n",
    "\n",
    "\n",
    "Real data:\n",
    "\n",
    "Question:\n",
    "{question}\n",
    "Gold Answer:\n",
    "{ga}\n",
    "\n",
    "Answer1: {naive}\n",
    "Answer2: {light}\n",
    "Answer3: {mini}\n",
    "\n",
    "output:\n",
    "\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#deepseek\n",
    "from openai import OpenAI\n",
    "chatbot = OpenAI(api_key=My_deepseek_key, base_url=\"https://api.deepseek.com\")\n",
    "\n",
    "chat_list = []\n",
    "for i in range(filelength):\n",
    "    p = PROMPT.format(question = QUESTION_LIST[i], ga = GA_LIST[i], naive = naiveanswer_LIST[i], light = lightraganswer_LIST[i], mini = minianswer_LIST[i])\n",
    "    chat_completion = chatbot.chat.completions.create(\n",
    "        messages=[\n",
    "            {\n",
    "                \"role\": \"system\",\n",
    "                \"content\":p,\n",
    "            },\n",
    "            \n",
    "\n",
    "        ],\n",
    "        model=\"deepseek-chat\",\n",
    "        stream = False\n",
    "    )\n",
    "    chat_list.append(chat_completion.choices[0].message.content.strip())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#openai\n",
    "from openai import OpenAI\n",
    "from tqdm import trange\n",
    "chatbot = OpenAI()\n",
    "chat_list = []\n",
    "for i in trange(filelength):\n",
    "    p = PROMPT.format(question = QUESTION_LIST[i], ga = GA_LIST[i], naive = naiveanswer_LIST[i], light = lightraganswer_LIST[i], mini = minianswer_LIST[i])\n",
    "    chat_completion = chatbot.chat.completions.create(\n",
    "        messages=[\n",
    "            {\n",
    "                \"role\": \"system\",\n",
    "                \"content\":p,\n",
    "            },\n",
    "        ],\n",
    "        model=\"gpt-4o\",\n",
    "    )\n",
    "    chat_list.append(chat_completion.choices[0].message.content.strip())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import json_repair\n",
    "chat_score_list = []    \n",
    "for chat in chat_list:\n",
    "    try:\n",
    "        data = json_repair.loads(chat.strip('```json').strip('```'))\n",
    "        chat_score_list.append(data)\n",
    "    except:\n",
    "        chat_score_list.append(0)\n",
    "        print('Error in chat:', chat)\n",
    "\n",
    "all_score1 = [data['Score1'] for data in chat_score_list]\n",
    "all_score2 = [data['Score2'] for data in chat_score_list]\n",
    "all_score3 = [data['Score3'] for data in chat_score_list]\n",
    "\n",
    "all_score1_1 = all_score1.count(1)\n",
    "all_score1_0 = all_score1.count(0)\n",
    "all_score1_neg = all_score1.count(-1)\n",
    "\n",
    "all_score2_1 = all_score2.count(1)\n",
    "all_score2_0 = all_score2.count(0)\n",
    "all_score2_neg = all_score2.count(-1)\n",
    "\n",
    "all_score3_1 = all_score3.count(1)\n",
    "all_score3_0 = all_score3.count(0)\n",
    "all_score3_neg = all_score3.count(-1)\n",
    "\n",
    "all = len(all_score1)\n",
    "print(all_score1_1, all_score1_0, all_score1_neg)\n",
    "print(all_score2_1, all_score2_0, all_score2_neg)\n",
    "print(all_score3_1, all_score3_0, all_score3_neg)\n",
    "\n",
    "print(f\"Score1 1: {all_score1_1 / all * 100:.2f}\\%, Score1 0: {all_score1_0 / all * 100:.2f}\\%, Score1 -1: {all_score1_neg / all * 100:.2f}\\%\")    \n",
    "print(f\"Score2 1: {all_score2_1 / all * 100:.2f}\\%, Score2 0: {all_score2_0 / all * 100:.2f}\\%, Score2 -1: {all_score2_neg / all * 100:.2f}\\%\")\n",
    "print(f\"Score3 1: {all_score3_1 / all * 100:.2f}\\%, Score3 0: {all_score3_0 / all * 100:.2f}\\%, Score3 -1: {all_score3_neg / all * 100:.2f}\\%\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Tianyu_agent",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
